knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(broom)
library(ggplot2)
library(splines)
library(splines2)
library(ggplot2)
library(olsrr)
## 
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
## 
##     rivers
library(mice)
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(DescTools)
summ.MNfit <- function(fit, digits=3){
  s <- summary(fit)
  for(i in 2:length(fit$lev))
  {
    ##
    cat("\nLevel", fit$lev[i], "vs. Level", fit$lev[1], "\n")
    ##
    betaHat <- s$coefficients[(i-1),]
    se <- s$standard.errors[(i-1),]
    zStat <- betaHat /se
    pval <- 2*pnorm(abs(zStat), lower.tail = FALSE)
    ##
    RRR <- exp(betaHat)
    RRR.lower <- exp(betaHat - qnorm(0.975)*se)
    RRR.upper <- exp(betaHat + qnorm(0.975)*se)
    ##
    results <- cbind(betaHat, se, pval, RRR, RRR.lower, RRR.upper)
    print(round(results, digits=digits))
  }  
}
diabetes <- read.csv("diabetes.csv")
head(diabetes)
##     id chol stab.glu hdl ratio glyhb   location age gender height weight  frame
## 1 1000  203       82  56   3.6  4.31 Buckingham  46 female     62    121 medium
## 2 1001  165       97  24   6.9  4.44 Buckingham  29 female     64    218  large
## 3 1002  228       92  37   6.2  4.64 Buckingham  58 female     61    256  large
## 4 1003   78       93  12   6.5  4.63 Buckingham  67   male     67    119  large
## 5 1005  249       90  28   8.9  7.72 Buckingham  64   male     68    183 medium
## 6 1008  248       94  69   3.6  4.81 Buckingham  34   male     71    190  large
##   bp.1s bp.1d bp.2s bp.2d waist hip time.ppn
## 1   118    59    NA    NA    29  38      720
## 2   112    68    NA    NA    46  48      360
## 3   190    92   185    92    49  57      180
## 4   110    50    NA    NA    33  38      480
## 5   138    80    NA    NA    44  41      300
## 6   132    86    NA    NA    36  42      195

Missing Data

# Imputation
diabetes$outcome[diabetes$glyhb < 5.70] <- 1
diabetes$outcome[diabetes$glyhb >=5.70 & diabetes$glyhb <=6.50] <-2
diabetes$outcome[diabetes$glyhb >6.50] <-3

diabetes2 <- diabetes %>% mutate("diabetes"=ifelse(diabetes$glyhb > 6.50, 1,0)) %>% 
  mutate("BMI"=(weight*0.453592)/(height*0.0254)^2) %>% mutate("w_h_ratio"=waist/hip) %>% 
  mutate("lnglyhb"=log(glyhb)) %>%
  select(c("id","glyhb", "lnglyhb","w_h_ratio","age","BMI","chol","hdl" ,"diabetes", "outcome","gender"))
head(diabetes2)
##     id glyhb  lnglyhb w_h_ratio age      BMI chol hdl diabetes outcome gender
## 1 1000  4.31 1.460938 0.7631579  46 22.13094  203  56        0       1 female
## 2 1001  4.44 1.490654 0.9583333  29 37.41920  165  24        0       1 female
## 3 1002  4.64 1.534714 0.8596491  58 48.37024  228  37        0       1 female
## 4 1003  4.63 1.532557 0.8684211  67 18.63783   78  12        0       1   male
## 5 1005  7.72 2.043814 1.0731707  64 27.82475  249  28        1       3   male
## 6 1008  4.81 1.570697 0.8571429  34 26.49933  248  69        0       1   male
md.pattern(diabetes2)
##     id age gender chol hdl w_h_ratio BMI glyhb lnglyhb diabetes outcome   
## 381  1   1      1    1   1         1   1     1       1        1       1  0
## 13   1   1      1    1   1         1   1     0       0        0       0  4
## 6    1   1      1    1   1         1   0     1       1        1       1  1
## 2    1   1      1    1   1         0   1     1       1        1       1  1
## 1    1   1      1    0   0         1   1     1       1        1       1  2
##      0   0      0    1   1         2   6    13      13       13      13 62
tempData <- mice(diabetes2, m=5, method = 'pmm', seed=96324)
## 
##  iter imp variable
##   1   1  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   1   2  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   1   3  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   1   4  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   1   5  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   2   1  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   2   2  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   2   3  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   2   4  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   2   5  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   3   1  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   3   2  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   3   3  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   3   4  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   3   5  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   4   1  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   4   2  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   4   3  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   4   4  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   4   5  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   5   1  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   5   2  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   5   3  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   5   4  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
##   5   5  glyhb  lnglyhb  w_h_ratio  BMI  chol  hdl  diabetes  outcome
## Warning: Number of logged events: 1
dat_imp <- complete(tempData,action=1)  #join the first dataset
head(dat_imp)
##     id glyhb  lnglyhb w_h_ratio age      BMI chol hdl diabetes outcome gender
## 1 1000  4.31 1.460938 0.7631579  46 22.13094  203  56        0       1 female
## 2 1001  4.44 1.490654 0.9583333  29 37.41920  165  24        0       1 female
## 3 1002  4.64 1.534714 0.8596491  58 48.37024  228  37        0       1 female
## 4 1003  4.63 1.532557 0.8684211  67 18.63783   78  12        0       1   male
## 5 1005  7.72 2.043814 1.0731707  64 27.82475  249  28        1       3   male
## 6 1008  4.81 1.570697 0.8571429  34 26.49933  248  69        0       1   male
xyplot(tempData, lnglyhb ~ w_h_ratio + age+ chol+BMI)
densityplot(tempData)

# Plot with complete dataset (joining the first dataset)
dat_imp$female[dat_imp$gender=="female"] <- 1
dat_imp$female[dat_imp$gender=="male"] <- 0
hist(dat_imp$lnglyhb, main = "Histogram of imputated log(glycosylated Hemoglobin)")
scatter.smooth(dat_imp$w_h_ratio, dat_imp$lnglyhb, col="brown", xlab = "waist/hip ratio", 
               ylab = "log(Glycosylated Hemoglobin)")
boxplot(dat_imp$glyhb~ dat_imp$female, col="orange",xlab = "Female=1", ylab = "Glycosylated Hemoglobin")

Modeling

Diabetes damages arteries and makes them targets for hardening, called atherosclerosis, which can then cause high blood pressure. \(\\\) Diabetes confounders: age, gender, TC, BMI \(\\\) Hypertension confounders: serum total cholesterol (TC), serum HDL-cholesterol (HDL-C), body mass index (BMI) \(\\\)

1. Linear models

dat <- diabetes %>% mutate(w_h_ratio=waist/hip) %>% filter(!is.na(w_h_ratio) & !is.na(glyhb) & !is.na(chol))
dat$female[dat$gender=="female"] <- 1
dat$female[dat$gender=="male"] <- 0
dat["BMI"]=(dat$weight*0.453592)/(dat$height*0.0254)^2
dat <- dat %>% filter(!is.na(BMI))
dat <- dat[order(dat$BMI),]

scatter.smooth(dat$w_h_ratio, dat$glyhb, col="brown", xlab = "waist/hip ratio", 
               ylab = "Glycosylated Hemoglobin")
scatter.smooth(dat$age, dat$glyhb, col="blue", xlab = "Age", 
               ylab = "Glycosylated Hemoglobin")
boxplot(dat$glyhb~ dat$female, col="orange",xlab = "Female=1", ylab = "Glycosylated Hemoglobin")
hist(dat$glyhb, xlab = "Glycosolated Hemoglobin", main = "Histogram of glycosylated hemoglobin", col="gray")
hist(dat$w_h_ratio,xlab="Waist/hip ratio", main="Histogram of waist/hip ratio", col="blue")

# log transformaion on glycosylated hemoglobin (glyhb)
dat$ln_glyhb <- log(dat$glyhb)
scatter.smooth(dat$w_h_ratio, dat$ln_glyhb, col="brown", xlab = "waist/hip ratio", 
               ylab = "ln(Glycosylated Hemoglobin)")
hist(dat$ln_glyhb, xlab = "ln(Glycosolated Hemoglobin)", main = "Histogram of ln(glycosylated hemoglobin)", col="gray")

library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following objects are masked from 'package:DescTools':
## 
##     MAE, RMSE
## The following object is masked from 'package:purrr':
## 
##     lift
# preprocl <- preProcess(dat[,c("w_h_ratio", "chol", "BMI", "age","ln_glyhb")],method = c("range"))
#dat_nor <- predict(preprocl, dat[,c("w_h_ratio", "chol", "BMI", "age", "ln_glyhb")])
#hist(dat_nor$ln_glyhb)

# mod.normal <- lm(ln_glyhb ~ w_h_ratio + age + chol+I(chol^2)+BMI+I(BMI^2), data=dat_nor)

linear model coefficients

mod.wh <- lm(ln_glyhb ~ w_h_ratio,data=dat)
mod.age <- lm(ln_glyhb ~ age, data=dat)
mod.wh2 <- lm(ln_glyhb ~ w_h_ratio + I(w_h_ratio^2), data=dat) #no need
mod.wh_age <- lm(ln_glyhb ~ w_h_ratio + age, data=dat)
mod.wh_gender <- lm(ln_glyhb ~ w_h_ratio + female, data=dat)
mod.wh2_age <- lm(ln_glyhb ~ w_h_ratio + I(w_h_ratio^2)+ age, data=dat)
mod.wh2_age2 <- lm(ln_glyhb ~ w_h_ratio + I(w_h_ratio^2)+age+I(age^2), data=dat)
mod.int <- lm(ln_glyhb ~ w_h_ratio + age + w_h_ratio*age, data=dat) #no need
mod.spline_age <- lm(ln_glyhb~ bSpline(w_h_ratio, df=4) +age , data=dat)
mod.nspline_age <- lm(ln_glyhb~ ns(w_h_ratio, df=4) + age,data=dat)
mod.spline <- lm(ln_glyhb~bSpline(w_h_ratio,df=4), data=dat)
mod.wh_age_chol <- lm(ln_glyhb ~ w_h_ratio + age+ chol, data=dat)
mod.wh_age_chol_bmi <- lm(ln_glyhb ~ w_h_ratio + age+ chol + BMI, data=dat)
mod.wh_age_chol2_bmi <- lm(ln_glyhb~ w_h_ratio+age+chol+I(chol^2)+BMI, data=dat)
mod.wh_age_chol2_bmi2 <- lm(ln_glyhb~ w_h_ratio+age+chol+I(chol^2)+BMI+I(BMI^2), data=dat)
mod.int2 <- lm(ln_glyhb ~ w_h_ratio + age + chol + BMI + chol*w_h_ratio, data=dat) #no need
mod.int3 <- lm(ln_glyhb ~ w_h_ratio + age + chol+ BMI + BMI*w_h_ratio,data=dat) #no need
tidy(mod.wh) %>% mutate('2.5%'=confint(mod.wh)[,1]) %>% mutate('97.5%'=confint(mod.wh)[,2])
tidy(mod.wh2) %>% mutate('2.5%'=confint(mod.wh2)[,1]) %>% mutate('97.5%'=confint(mod.wh2)[,2])
tidy(mod.age) %>% mutate('2.5%'=confint(mod.age)[,1]) %>% mutate('97.5%'=confint(mod.age)[,2])
tidy(mod.wh_age) %>% mutate('2.5%'=confint(mod.wh_age)[,1]) %>% mutate('97.5%'=confint(mod.wh_age)[,2])
tidy(mod.wh_gender) %>% mutate('2.5%'=confint(mod.wh_gender)[,1]) %>% mutate('97.5%'=confint(mod.wh_gender)[,2])
tidy(mod.wh2_age) %>% mutate('2.5%'=confint(mod.wh2_age)[,1]) %>% mutate('97.5%'=confint(mod.wh2_age)[,2])
tidy(mod.wh2_age2) %>% mutate('2.5%'=confint(mod.wh2_age2)[,1]) %>% mutate('97.5%'=confint(mod.wh2_age2)[,2])
tidy(mod.int) #no need
tidy(mod.spline_age) %>% mutate('2.5%'=confint(mod.spline_age)[,1]) %>% mutate('97.5%'=confint(mod.spline_age)[,2])
tidy(mod.nspline_age) %>% mutate('2.5%'=confint(mod.nspline_age)[,1]) %>% mutate('97.5%'=confint(mod.nspline_age)[,2])
tidy(mod.wh_age_chol)
tidy(mod.wh_age_chol_bmi)
tidy(mod.wh_age_chol2_bmi)
tidy(mod.wh_age_chol2_bmi2) %>% mutate('2.5%'=confint(mod.wh_age_chol2_bmi2)[,1]) %>%
  mutate('97.5%'=confint(mod.wh_age_chol2_bmi2)[,2])
tidy(mod.int2) #no need
tidy(mod.int3) #no need
anova(mod.wh_age,mod.spline_age)
anova(mod.wh_age, mod.nspline_age)
#imputated models
mod.imp.wh <- with(tempData, lm(lnglyhb ~ w_h_ratio))
mod.imp.wh_age <- with(tempData,lm(lnglyhb ~ w_h_ratio+age))
mod.imp.wh_age_chol <- with(tempData, lm(lnglyhb ~w_h_ratio + age+chol))
mod.imp.wh_age_chol2_bmi2 <- with(tempData, lm(lnglyhb ~ w_h_ratio +age+chol+I(chol^2)+BMI+I(BMI^2)))

summary(pool(mod.imp.wh_age_chol2_bmi2))
##          term      estimate    std.error statistic       df      p.value
## 1 (Intercept)  9.770420e-01 3.150126e-01  3.101597 325.7748 2.093038e-03
## 2   w_h_ratio  3.762357e-01 2.038268e-01  1.845860 385.8979 6.567824e-02
## 3         age  6.315271e-03 9.145093e-04  6.905639 382.6038 2.090861e-11
## 4        chol -4.240503e-03 1.718675e-03 -2.467310 392.2344 1.403945e-02
## 5   I(chol^2)  1.148041e-05 3.673835e-06  3.124912 393.3280 1.910499e-03
## 6         BMI  2.239944e-02 1.477827e-02  1.515701 165.8391 1.314987e-01
## 7    I(BMI^2) -2.562823e-04 2.287887e-04 -1.120170 191.8205 2.640419e-01
pool.r.squared(mod.imp.wh_age_chol2_bmi2)
##           est     lo 95     hi 95 fmi
## R^2 0.2199923 0.1509404 0.2942487 NaN
pool.compare(mod.imp.wh_age_chol2_bmi2, mod.imp.wh_age_chol, method = "wald")
## Warning: 'pool.compare' is deprecated.
## Use 'D1' instead.
## See help("Deprecated")
## $call
## pool.compare(fit1 = mod.imp.wh_age_chol2_bmi2, fit0 = mod.imp.wh_age_chol, 
##     method = "wald")
## 
## $call11
## with.mids(data = tempData, expr = lm(lnglyhb ~ w_h_ratio + age + 
##     chol + I(chol^2) + BMI + I(BMI^2)))
## 
## $call12
## mice(data = diabetes2, m = 5, method = "pmm", seed = 96324)
## 
## $call01
## with.mids(data = tempData, expr = lm(lnglyhb ~ w_h_ratio + age + 
##     chol))
## 
## $call02
## mice(data = diabetes2, m = 5, method = "pmm", seed = 96324)
## 
## $method
## [1] "wald"
## 
## $nmis
##        id     glyhb   lnglyhb w_h_ratio       age       BMI      chol       hdl 
##         0        13        13         2         0         6         1         1 
##  diabetes   outcome    gender 
##        13        13         0 
## 
## $m
## [1] 5
## 
## $qbar1
##   (Intercept)     w_h_ratio           age          chol     I(chol^2) 
##  9.770420e-01  3.762357e-01  6.315271e-03 -4.240503e-03  1.148041e-05 
##           BMI      I(BMI^2) 
##  2.239944e-02 -2.562823e-04 
## 
## $qbar0
## (Intercept)   w_h_ratio         age        chol 
## 0.701572500 0.505540445 0.005913854 0.001150394 
## 
## $ubar1
## [1] 9.515211e-02 4.111503e-02 8.254534e-07 2.943818e-06 1.347658e-11
## [6] 1.938050e-04 4.721607e-08
## 
## $ubar0
## [1] 3.246926e-02 4.126520e-02 8.487819e-07 1.072253e-07
## 
## $deviances
## NULL
## 
## $Dm
##          [,1]
## [1,] 4.250771
## 
## $rm
## [1] 0.0790081
## 
## $df1
## [1] 3
## 
## $df2
## [1] 1070.747
## 
## $pvalue
##             [,1]
## [1,] 0.005375078

Linear model compare

plot(mod.wh_age_chol2_bmi2,which=c(1,2,3))
# Cook's Distance Plot
ols_plot_cooksd_chart(mod.wh_age_chol_bmi)  #id 2778, influential & outlier
# DFFITS Plot
ols_plot_dffits(mod.wh_age_chol_bmi)
# Leverage Plot (outlier)
ols_plot_resid_lev(mod.wh_age_chol_bmi) 

dat %>% filter(id==2778)  # very high glyhb & chol in terms of her age
##     id chol stab.glu hdl ratio glyhb   location age gender height weight  frame
## 1 2778  443      185  23  19.3 14.31 Buckingham  51 female     70    235 medium
##   bp.1s bp.1d bp.2s bp.2d waist hip time.ppn outcome w_h_ratio female      BMI
## 1   158    98   148    88    43  48      420       3 0.8958333      1 33.71862
##   ln_glyhb
## 1 2.660959
dat_eli <- dat %>% filter(!id==2778)
mod.eli <- lm(ln_glyhb~ w_h_ratio +age+chol+I(chol^2)+BMI+I(BMI^2), data=dat_eli)

R^2 Adjusted R^2 Square root of MSE AIC BIC
ln_glyhb ~ w_h_ratio 0.0573543 0.0548671 0.3018287 174.4346 186.2630
ln_glyhb ~ age 0.1439173 0.1416585 0.2876366 137.7354 149.5638
ln_glyhb ~ w_h_ratio + age 0.1627443 0.1583143 0.2844562 131.2629 147.0341
ln_glyhb ~ w_h_ratio + I(w_h_ratio^2) + age 0.1627459 0.1560834 0.2844559 133.2622 152.9762
ln_glyhb ~ w_h_ratio + I(w_h_ratio^2) + age + I(age^2) 0.1679803 0.1591290 0.2835653 132.8728 156.5295
ln_glyhb ~ bSpline(w_h_ratio) + age 0.1645070 0.1533671 0.2841565 136.4599 164.0595
ln_glyhb ~ ns(w_h_ratio) + age 0.1628954 0.1517340 0.2844305 137.1942 164.7937
ln_glyhb ~ w_h_ratio + age + chol 0.1931040 0.1866830 0.2792512 119.1908 138.9048
ln_glyhb ~ w_h_ratio + age + chol+ bmi 0.2100817 0.2016783 0.2762977 113.0887 136.7455
ln_glyhb ~ w_h_ratio + age + chol + I(chol^2)+ bmi 0.2259944 0.2156744 0.2735006 107.3352 134.9348
ln_glyhb ~ w_h_ratio + age + chol+ I(chol^2) + bmi+I(bmi^2) 0.2285425 0.2161662 0.2730500 108.0788 139.6212
ln_glyhb ~ eliminate influential point 0.2091315 0.1964098 0.2730504 107.8381 139.3595

ggplot(data=dat, aes(x=w_h_ratio,y=ln_glyhb))+
  geom_point(alpha=0.5,col=ifelse(dat$female==1,"hot pink","blue"))+
  geom_line(aes(y=predict(mod.wh_gender),col=ifelse(dat$female==1,"wh+female","wh+male")))+
  geom_line(aes(y=predict(mod.wh),col="wh"))+
  geom_line(aes(y=predict(mod.wh2),col="wh2"))+
  scale_color_manual(name="Models", values = c("wh"="green", "wh+male"="blue", "wh+female"="hot pink",
                                               "wh2"="black"))+
  theme_bw()
## Warning: Use of `dat$female` is discouraged. Use `female` instead.

2. Logistic, Multinomial, Ordinal models

  1. Logistic model– have/don’t have diabetes Have diabetes if value of glycosylated hemoglobin > 6.50 DCCT%, otherwise, don’t have diabetes. Then, the outcome would be binomial, either have or don’t have the diabetes.
dat$diabetes = ifelse(dat$glyhb > 6.50, 1,0)  #logistic model

mod_lg <- glm(diabetes ~ w_h_ratio, family=binomial(),data=dat)
mod_lg2 <- glm(diabetes ~ w_h_ratio + I(w_h_ratio^2), family=binomial(), data=dat)
tidy(mod_lg) %>% mutate('2.5%'=confint(mod_lg)[,1]) %>% mutate('97.5%'=confint(mod_lg)[,2])
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## # A tibble: 2 x 7
##   term        estimate std.error statistic    p.value `2.5%` `97.5%`
##   <chr>          <dbl>     <dbl>     <dbl>      <dbl>  <dbl>   <dbl>
## 1 (Intercept)    -8.10      1.73     -4.69 0.00000277 -11.6    -4.78
## 2 w_h_ratio       7.25      1.90      3.81 0.000138     3.57   11.1
tidy(mod_lg2) %>% mutate('2.5%'=confint(mod_lg2)[,1]) %>% mutate('97.5%'=confint(mod_lg2)[,2])
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## # A tibble: 3 x 7
##   term           estimate std.error statistic p.value `2.5%` `97.5%`
##   <chr>             <dbl>     <dbl>     <dbl>   <dbl>  <dbl>   <dbl>
## 1 (Intercept)       -25.8      15.6     -1.65  0.0982  -59.0    2.94
## 2 w_h_ratio          45.9      33.8      1.36  0.174   -16.6  118.  
## 3 I(w_h_ratio^2)    -21.0      18.3     -1.15  0.251   -59.7   12.9
anova(mod_lg, mod_lg2, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: diabetes ~ w_h_ratio
## Model 2: diabetes ~ w_h_ratio + I(w_h_ratio^2)
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       379     326.62                     
## 2       378     325.21  1   1.4153   0.2342
#imputated models
mod.imp.lg <- with(data = tempData, glm(diabetes ~ w_h_ratio, family=binomial()))
mod.imp.lg_chol_bmi <- with(data=tempData, glm(diabetes ~ w_h_ratio+age+chol+BMI, family=binomial()))
summary(pool(mod.imp.lg_chol_bmi))
##          term      estimate   std.error statistic       df      p.value
## 1 (Intercept) -10.864499534 2.011600755 -5.400922 386.2862 1.161069e-07
## 2   w_h_ratio   3.690012889 2.002281353  1.842904 378.0120 6.612578e-02
## 3         age   0.046822866 0.010177551  4.600603 204.8704 7.374940e-06
## 4        chol   0.008985799 0.003172021  2.832831 354.5841 4.877496e-03
## 5         BMI   0.054200020 0.023166062  2.339630 184.8318 2.037072e-02
pool.compare(mod.imp.lg_chol_bmi, mod.imp.lg,method="likelihood")
## Warning: 'pool.compare' is deprecated.
## Use 'D1' instead.
## See help("Deprecated")
## $call
## pool.compare(fit1 = mod.imp.lg_chol_bmi, fit0 = mod.imp.lg, method = "likelihood")
## 
## $call11
## with.mids(data = tempData, expr = glm(diabetes ~ w_h_ratio + 
##     age + chol + BMI, family = binomial()))
## 
## $call12
## mice(data = diabetes2, m = 5, method = "pmm", seed = 96324)
## 
## $call01
## with.mids(data = tempData, expr = glm(diabetes ~ w_h_ratio, family = binomial()))
## 
## $call02
## mice(data = diabetes2, m = 5, method = "pmm", seed = 96324)
## 
## $method
## [1] "likelihood"
## 
## $nmis
##        id     glyhb   lnglyhb w_h_ratio       age       BMI      chol       hdl 
##         0        13        13         2         0         6         1         1 
##  diabetes   outcome    gender 
##        13        13         0 
## 
## $m
## [1] 5
## 
## $qbar1
##   (Intercept)     w_h_ratio           age          chol           BMI 
## -10.864499534   3.690012889   0.046822866   0.008985799   0.054200020 
## 
## $qbar0
## (Intercept)   w_h_ratio 
##   -7.570741    6.675263 
## 
## $ubar1
## [1] 3.999443e+00 3.939423e+00 9.408652e-05 9.764473e-06 4.819528e-04
## 
## $ubar0
## [1] 2.831070 3.452928
## 
## $deviances
## $deviances$dev1.M
## [1] 297.9033 312.9882 302.1334 308.0085 301.6038
## 
## $deviances$dev0.M
## [1] 342.9807 356.4104 345.6270 348.6405 349.7884
## 
## $deviances$dev1.L
## [1] 297.9948 313.1486 302.1584 308.2401 301.8658
## 
## $deviances$dev0.L
## [1] 342.9813 356.4313 345.6333 348.6510 349.7927
## 
## 
## $Dm
## [1] 13.6769
## 
## $rm
## [1] 0.07276765
## 
## $df1
## [1] 3
## 
## $df2
## [1] 1244.414
## 
## $pvalue
## [1] 8.829767e-09
plot(dat$diabetes~dat$w_h_ratio,ylab="P(Diabetes)",xlab="waist/hip ratio", col="black")
lines(mod_lg$fitted.values~dat$w_h_ratio,type='p',col='blue') 
lines(mod_lg2$fitted.values~ dat$w_h_ratio, type='p', col="red")
legend('bottomright',legend=c("linear","quadratic"),pch=c(1,1),col=c("blue","red"))

mod_lg3 <- glm(diabetes ~ w_h_ratio + age, family=binomial(),data=dat)
mod_lg4<- glm(diabetes ~ w_h_ratio + age + w_h_ratio*age, family=binomial(),data=dat)
mod_lg5 <- glm(diabetes ~ w_h_ratio + age + chol + BMI, family=binomial(), data=dat)
mod_lg6 <- glm(diabetes ~ w_h_ratio+age+chol+I(chol^2)+BMI, family = binomial(),data=dat)
mod_lg7 <- glm(diabetes ~ w_h_ratio+age+chol+I(chol^2)+BMI+I(BMI^2),family=binomial(),data=dat)
mod_int1 <- glm(diabetes ~ w_h_ratio +age+chol+BMI + w_h_ratio*chol, family = binomial(),data=dat) #no need
mod_int2 <- glm(diabetes ~ w_h_ratio + age + chol + BMI+w_h_ratio * BMI, family = binomial(),data=dat) #no need
library(ResourceSelection)
library(LogisticDx)
tidy(mod_lg3) %>% mutate('2.5%'=confint(mod_lg3)[,1]) %>% mutate('97.5%'=confint(mod_lg3)[,2])
tidy(mod_lg4) %>% mutate('2.5%'=confint(mod_lg4)[,1]) %>% mutate('97.5%'=confint(mod_lg4)[,2])  #no need
tidy(mod_lg5)
tidy(mod_lg6)
tidy(mod_lg7)
tidy(mod_int1) #no need
tidy(mod_int2) # no need

locoef<-exp(coef(mod_lg5)[2]*0.1)
exp(log(locoef) + c(-1, 1)*1.96*0.1*2.037622)

hoslem.test(dat$diabetes, fitted(mod_lg5), g=10)
gof(mod_lg5)

#imputated model
mod.imp.lg3 <- with(data = tempData, glm(diabetes~ w_h_ratio+age, family = binomial()))
mod.imp.lg5 <- with(data=tempData, glm(diabetes ~ w_h_ratio +age+ chol+BMI, family = binomial()))
summary(pool(mod.imp.lg5))
pool.compare(mod.imp.lg5, mod.imp.lg3, method = "likelihood")
AIC BIC Pseudo-R2
logit(P_diabetes) ~ w_h_ratio 330.6225 338.5081 0.0441627
logit(P_diabetes) ~ w_h_ratio + age 304.5589 316.3873 0.1262887
logit(P_diabetes) ~ w_h_ratio+ age+chol+bmi 291.9100 311.6240 0.1750106
logit(P_diabetes) ~ w_h_ratio+age+chol+I(chol^2)+bmi 293.2562 316.9130 0.1769237
logit(P_diabetes) ~ w_h_ratio + age + chol + I(chol^2)+ bmi+I(bmi^2) 293.1390 320.7386 0.1831196

Multinomial Models

Based on the 2010 American Diabetes Association Standards of Medical Care in Diabetes, HbA1C < 5.7\(\%\) is diagnosed as normal, 5.7-6.4\(\%\) is diagnosed as prediabetes and > 6.5\(\%\) is diagnosed as diabetes.

library(nnet)
# outcome =1: normal
# outcome =2: prediabetes
# outcome =3: diabetes

dat$outcome[dat$glyhb < 5.70] <- 1
dat$outcome[dat$glyhb >=5.70 & dat$glyhb <=6.50] <-2
dat$outcome[dat$glyhb >6.50] <-3

mod.mul_wh <- multinom(outcome ~ w_h_ratio, data=dat)
## # weights:  9 (4 variable)
## initial  value 418.571282 
## iter  10 value 243.333915
## iter  20 value 242.883177
## final  value 242.882318 
## converged
summ.MNfit(mod.mul_wh)
## 
## Level 2 vs. Level 1 
##             betaHat    se  pval      RRR RRR.lower   RRR.upper
## (Intercept) -11.091 2.634 0.000     0.00     0.000       0.003
## w_h_ratio     9.589 2.867 0.001 14596.57    52.923 4025880.330
## 
## Level 3 vs. Level 1 
##             betaHat    se pval      RRR RRR.lower  RRR.upper
## (Intercept)  -8.991 1.799    0    0.000     0.000      0.004
## w_h_ratio     8.353 1.984    0 4241.562    86.887 207059.411
mod.mul_wh_age <- multinom(outcome ~ w_h_ratio+age, data=dat)
## # weights:  12 (6 variable)
## initial  value 418.571282 
## iter  10 value 226.917988
## iter  20 value 223.943778
## final  value 223.918303 
## converged
summ.MNfit(mod.mul_wh_age)
## 
## Level 2 vs. Level 1 
##             betaHat    se  pval      RRR RRR.lower  RRR.upper
## (Intercept) -11.196 2.657 0.000    0.000     0.000      0.003
## w_h_ratio     7.268 2.947 0.014 1433.815     4.445 462511.301
## age           0.044 0.014 0.002    1.045     1.016      1.074
## 
## Level 3 vs. Level 1 
##             betaHat    se  pval     RRR RRR.lower RRR.upper
## (Intercept)  -9.268 1.870 0.000   0.000     0.000     0.004
## w_h_ratio     5.638 2.086 0.007 281.010     4.712 16756.945
## age           0.053 0.010 0.000   1.054     1.034     1.075
mod.mul_int <- multinom(outcome~ w_h_ratio + age+w_h_ratio*age, data=dat)
## # weights:  15 (8 variable)
## initial  value 418.571282 
## iter  10 value 235.145054
## iter  20 value 225.068841
## iter  30 value 223.441915
## iter  40 value 223.356107
## iter  50 value 223.202476
## iter  60 value 223.197788
## final  value 223.195536 
## converged
summ.MNfit(mod.mul_int)
## 
## Level 2 vs. Level 1 
##               betaHat    se  pval       RRR RRR.lower   RRR.upper
## (Intercept)   -14.891 1.788 0.000     0.000     0.000       0.000
## w_h_ratio      11.415 1.911 0.000 90657.952  2143.235 3834793.577
## age             0.114 0.055 0.036     1.121     1.007       1.248
## w_h_ratio:age  -0.079 0.057 0.165     0.924     0.827       1.033
## 
## Level 3 vs. Level 1 
##               betaHat    se  pval         RRR RRR.lower    RRR.upper
## (Intercept)   -17.687 7.003 0.012       0.000     0.000 1.900000e-02
## w_h_ratio      15.031 7.753 0.053 3370579.635     0.848 1.340151e+13
## age             0.202 0.119 0.089       1.224     0.970 1.544000e+00
## w_h_ratio:age  -0.166 0.131 0.205       0.847     0.656 1.095000e+00
anova(mod.mul_wh_age, mod.mul_int, test="Chisq")
## Likelihood ratio tests of Multinomial Models
## 
## Response: outcome
##                               Model Resid. df Resid. Dev   Test    Df LR stat.
## 1                   w_h_ratio + age       756   447.8366                      
## 2 w_h_ratio + age + w_h_ratio * age       754   446.3911 1 vs 2     2 1.445534
##     Pr(Chi)
## 1          
## 2 0.4854072
mod.mul_wh_age_chol_bmi <- multinom(outcome ~ w_h_ratio + age + chol + BMI, data=dat)
## # weights:  18 (10 variable)
## initial  value 418.571282 
## iter  10 value 222.562958
## iter  20 value 215.503183
## final  value 215.503008 
## converged
summ.MNfit(mod.mul_wh_age_chol_bmi)
## 
## Level 2 vs. Level 1 
##             betaHat    se  pval     RRR RRR.lower  RRR.upper
## (Intercept) -11.217 2.961 0.000   0.000     0.000      0.004
## w_h_ratio     6.886 2.968 0.020 978.477     2.915 328472.200
## age           0.045 0.015 0.002   1.046     1.017      1.076
## chol         -0.002 0.005 0.750   0.998     0.988      1.009
## BMI           0.022 0.035 0.524   1.023     0.955      1.096
## 
## Level 3 vs. Level 1 
##             betaHat    se  pval     RRR RRR.lower RRR.upper
## (Intercept) -12.747 2.166 0.000   0.000     0.000     0.000
## w_h_ratio     5.232 2.122 0.014 187.167     2.926 11974.528
## age           0.053 0.010 0.000   1.054     1.033     1.076
## chol          0.009 0.003 0.006   1.009     1.003     1.016
## BMI           0.063 0.023 0.008   1.065     1.017     1.114
co2vs1<-exp(coef(mod.mul_wh_age_chol_bmi)[1,2]*0.1)
co3vs1<-exp(coef(mod.mul_wh_age_chol_bmi)[2,2]*0.1)
exp(log(co2vs1) + c(-1, 1)*1.96*0.1*summary(mod.mul_wh_age_chol_bmi)$standard.errors[1,2])
## [1] 1.112899 3.561677
exp(log(co3vs1) + c(-1, 1)*1.96*0.1*summary(mod.mul_wh_age_chol_bmi)$standard.errors[2,2])
## [1] 1.113312 2.557580
anova(mod.mul_wh_age, mod.mul_wh_age_chol_bmi, test="Chisq")
## Likelihood ratio tests of Multinomial Models
## 
## Response: outcome
##                          Model Resid. df Resid. Dev   Test    Df LR stat.
## 1              w_h_ratio + age       756   447.8366                      
## 2 w_h_ratio + age + chol + BMI       752   431.0060 1 vs 2     4 16.83059
##       Pr(Chi)
## 1            
## 2 0.002085055
mod.mul_wh_age_chol2_bmi <- multinom(outcome ~ w_h_ratio + age + chol+I(chol^2) + BMI, data=dat) #no need
## # weights:  21 (12 variable)
## initial  value 418.571282 
## iter  10 value 225.327789
## iter  20 value 215.108498
## final  value 215.048531 
## converged
anova(mod.mul_wh_age_chol_bmi,mod.mul_wh_age_chol2_bmi, test="Chisq")  #no need
## Likelihood ratio tests of Multinomial Models
## 
## Response: outcome
##                                      Model Resid. df Resid. Dev   Test    Df
## 1             w_h_ratio + age + chol + BMI       752   431.0060             
## 2 w_h_ratio + age + chol + I(chol^2) + BMI       750   430.0971 1 vs 2     2
##    LR stat.   Pr(Chi)
## 1                    
## 2 0.9089532 0.6347801
mod.mul_wh_age_chol2_bmi2 <- multinom(outcome ~ w_h_ratio + age + chol+I(chol^2) + BMI+I(BMI^2), data=dat) #no need
## # weights:  24 (14 variable)
## initial  value 418.571282 
## iter  10 value 240.966870
## iter  20 value 214.198693
## iter  30 value 213.959969
## final  value 213.959965 
## converged
anova(mod.mul_wh_age_chol_bmi,mod.mul_wh_age_chol2_bmi2, test="Chisq") #no need
## Likelihood ratio tests of Multinomial Models
## 
## Response: outcome
##                                                 Model Resid. df Resid. Dev
## 1                        w_h_ratio + age + chol + BMI       752   431.0060
## 2 w_h_ratio + age + chol + I(chol^2) + BMI + I(BMI^2)       748   427.9199
##     Test    Df LR stat.   Pr(Chi)
## 1                                
## 2 1 vs 2     4 3.086087 0.5435238
#imputated models
mod.imp.mul_wh <- with(data=tempData, multinom(outcome ~ w_h_ratio))
## # weights:  9 (4 variable)
## initial  value 442.740752 
## iter  10 value 258.238838
## iter  20 value 257.872597
## final  value 257.872097 
## converged
## # weights:  9 (4 variable)
## initial  value 442.740752 
## iter  10 value 259.360601
## iter  20 value 258.922465
## final  value 258.921860 
## converged
## # weights:  9 (4 variable)
## initial  value 442.740752 
## iter  10 value 260.002134
## iter  20 value 259.629534
## final  value 259.629034 
## converged
## # weights:  9 (4 variable)
## initial  value 442.740752 
## iter  10 value 258.279088
## iter  20 value 257.847393
## final  value 257.846741 
## converged
## # weights:  9 (4 variable)
## initial  value 442.740752 
## iter  10 value 256.220717
## iter  20 value 255.781036
## final  value 255.780362 
## converged
mod.imp.mul_wh_age <- with(data=tempData,multinom(outcome~w_h_ratio + age))
## # weights:  12 (6 variable)
## initial  value 442.740752 
## iter  10 value 240.840542
## iter  20 value 237.674551
## final  value 237.663111 
## converged
## # weights:  12 (6 variable)
## initial  value 442.740752 
## iter  10 value 242.778870
## iter  20 value 240.644084
## final  value 240.626569 
## converged
## # weights:  12 (6 variable)
## initial  value 442.740752 
## iter  10 value 243.503945
## iter  20 value 240.621748
## final  value 240.608292 
## converged
## # weights:  12 (6 variable)
## initial  value 442.740752 
## iter  10 value 241.496619
## iter  20 value 238.341687
## final  value 238.326388 
## converged
## # weights:  12 (6 variable)
## initial  value 442.740752 
## iter  10 value 237.149969
## iter  20 value 234.854788
## final  value 234.837024 
## converged
mod.imp.mul_wh_age_chol_bmi <- with(data=tempData, multinom(outcome ~ w_h_ratio +age+chol+BMI))
## # weights:  18 (10 variable)
## initial  value 442.740752 
## iter  10 value 237.210361
## iter  20 value 230.351727
## final  value 230.351559 
## converged
## # weights:  18 (10 variable)
## initial  value 442.740752 
## iter  10 value 239.165144
## iter  20 value 232.207976
## final  value 232.207774 
## converged
## # weights:  18 (10 variable)
## initial  value 442.740752 
## iter  10 value 239.106805
## iter  20 value 232.381706
## final  value 232.381555 
## converged
## # weights:  18 (10 variable)
## initial  value 442.740752 
## iter  10 value 238.290846
## iter  20 value 231.739167
## final  value 231.738904 
## converged
## # weights:  18 (10 variable)
## initial  value 442.740752 
## iter  10 value 233.088931
## iter  20 value 226.412468
## final  value 226.412102 
## converged
table_multi<- matrix(c(AIC(mod.mul_wh), BIC(mod.mul_wh),
                  AIC(mod.mul_wh_age ),BIC(mod.mul_wh_age ),
                  AIC(mod.mul_int), BIC(mod.mul_int),
                  AIC(mod.mul_wh_age_chol_bmi), BIC(mod.mul_wh_age_chol_bmi)
                  ), ncol=2,nrow=4, byrow=TRUE)
colnames(table_multi)<- c("AIC", "BIC")
rownames(table_multi)<- c("log(RRR) ~ w_h_ratio",
                     "log(RRR) ~ w_h_ratio + age",
                     "log(RRR) ~ w_h_ratio + age+w_h_ratio*age",
                     "log(RRR) ~ w_h_ratio + age + chol + BMI")

kable(table_multi)
AIC BIC
log(RRR) ~ w_h_ratio 493.7646 509.5358
log(RRR) ~ w_h_ratio + age 459.8366 483.4934
log(RRR) ~ w_h_ratio + age+w_h_ratio*age 462.3911 493.9335
log(RRR) ~ w_h_ratio + age + chol + BMI 451.0060 490.4340

plot(mod.mul_wh$fitted.values[,1][order(dat$w_h_ratio)] ~ sort(dat$w_h_ratio), type="l", col="dodgerblue", xlab=c("Waist/hip ratio"), ylab="Predicted Probability", ylim=c(0,1), main="Fitted Values, o1=Blue, o2=Pink, o3=Green")
points(mod.mul_wh$fitted.values[,2][order(dat$w_h_ratio)] ~ sort(dat$w_h_ratio), type="l", col="magenta")
points(mod.mul_wh$fitted.values[,3][order(dat$w_h_ratio)]~sort(dat$w_h_ratio), type="l", col="green")

Ordinal Model

library(VGAM)
## Loading required package: stats4
## 
## Attaching package: 'VGAM'
## The following object is masked from 'package:caret':
## 
##     predictors
## The following object is masked from 'package:tidyr':
## 
##     fill
# outcome =1: normal
# outcome =2: prediabetes
# outcome =3: diabetes

ord.wh <- vglm(outcome ~ w_h_ratio, cumulative(parallel=TRUE, reverse=TRUE), data=dat)
ord.wh_age <- vglm(outcome~ w_h_ratio + age, cumulative(parallel=TRUE, reverse=TRUE), data=dat)
ord.int <- vglm(outcome~w_h_ratio + age + w_h_ratio *age, cumulative(parallel = TRUE, reverse = TRUE),data=dat)
ord.wh_age_chol_bmi <- vglm(outcome ~ w_h_ratio + age + chol+ BMI, cumulative(parallel = TRUE, reverse = TRUE), data=dat)
ord.wh_age_chol2_bmi <- vglm(outcome ~ w_h_ratio + age + chol+I(chol^2)+ BMI, cumulative(parallel = TRUE, reverse = TRUE), data=dat)  #no need
ord.wh_age_chol2_bmi2 <- vglm(outcome ~ w_h_ratio + age + chol+I(chol^2)+ BMI+I(BMI^2), cumulative(parallel = TRUE, reverse = TRUE), data=dat)  #no need

summary(ord.int)
## 
## Call:
## vglm(formula = outcome ~ w_h_ratio + age + w_h_ratio * age, family = cumulative(parallel = TRUE, 
##     reverse = TRUE), data = dat)
## 
## Coefficients: 
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept):1 -17.0987     6.3642      NA       NA   
## (Intercept):2 -17.5656     6.3665  -2.759   0.0058 **
## w_h_ratio      14.9580     7.0567   2.120   0.0340 * 
## age             0.1982     0.1090   1.819   0.0689 . 
## w_h_ratio:age  -0.1658     0.1202  -1.379   0.1679   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
## 
## Residual deviance: 448.1292 on 757 degrees of freedom
## 
## Log-likelihood: -224.0646 on 757 degrees of freedom
## 
## Number of Fisher scoring iterations: 5 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
## 
## 
## Exponentiated coefficients:
##     w_h_ratio           age w_h_ratio:age 
##  3.134458e+06  1.219196e+00  8.472518e-01
summary(ord.wh_age_chol_bmi)
## 
## Call:
## vglm(formula = outcome ~ w_h_ratio + age + chol + BMI, family = cumulative(parallel = TRUE, 
##     reverse = TRUE), data = dat)
## 
## Coefficients: 
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1 -11.453072   1.844428  -6.210 5.31e-10 ***
## (Intercept):2 -11.942093   1.855078  -6.438 1.21e-10 ***
## w_h_ratio       5.288779   1.830869   2.889  0.00387 ** 
## age             0.048605   0.008860   5.486 4.12e-08 ***
## chol            0.007463   0.002885   2.587  0.00969 ** 
## BMI             0.049193   0.020344   2.418  0.01560 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
## 
## Residual deviance: 436.1374 on 756 degrees of freedom
## 
## Log-likelihood: -218.0687 on 756 degrees of freedom
## 
## Number of Fisher scoring iterations: 5 
## 
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):1'
## 
## 
## Exponentiated coefficients:
##  w_h_ratio        age       chol        BMI 
## 198.101337   1.049805   1.007491   1.050423
confint(ord.wh_age_chol_bmi)
##                       2.5 %      97.5 %
## (Intercept):1 -15.068084033 -7.83805940
## (Intercept):2 -15.577979316 -8.30620665
## w_h_ratio       1.700340926  8.87721648
## age             0.031239361  0.06597006
## chol            0.001808504  0.01311682
## BMI             0.009320564  0.08906582
exp(coef(ord.wh_age_chol_bmi)[3]) 
## w_h_ratio 
##  198.1013
or2vs1<-exp(coef(ord.wh_age_chol_bmi)[3]*0.1)
exp(coef(ord.wh_age_chol_bmi)[3]*0.1 + c(-1, 1)*1.96*0.1*sqrt(vcov(ord.wh_age_chol_bmi)[3,3]))
## [1] 1.185337 2.429604
table_ord<- matrix(c(AIC(ord.wh), BIC(ord.wh),
                  AIC(ord.wh_age),BIC(ord.wh_age),
                  AIC(ord.int), BIC(ord.int),
                  AIC(ord.wh_age_chol_bmi), BIC(ord.wh_age_chol_bmi),
                  AIC(ord.wh_age_chol2_bmi),BIC(ord.wh_age_chol2_bmi),
                  AIC(ord.wh_age_chol2_bmi2), BIC(ord.wh_age_chol2_bmi2)
                  ), ncol=2,nrow=6, byrow=TRUE)
colnames(table_ord)<- c("AIC", "BIC")
rownames(table_ord)<- c("log(OR) ~ w_h_ratio", 
                     "log(OR) ~ w_h_ratio + age",
                     "log(OR) ~ w_h_ratio + age + w_h_ratio*age",
                     "log(OR) ~ w_h_ratio+ age+chol+bmi",
                     "log(OR) ~ w_h_ratio+age+chol+I(chol^2)+bmi",
                     "log(OR) ~ w_h_ratio + age + chol + I(chol^2)+ bmi+I(bmi^2)")
table1 <- as.table(table_ord)
kable(table_ord)
AIC BIC
log(OR) ~ w_h_ratio 493.0312 504.8596
log(OR) ~ w_h_ratio + age 458.0683 473.8395
log(OR) ~ w_h_ratio + age + w_h_ratio*age 458.1292 477.8432
log(OR) ~ w_h_ratio+ age+chol+bmi 448.1374 471.7942
log(OR) ~ w_h_ratio+age+chol+I(chol^2)+bmi 448.6343 476.2339
log(OR) ~ w_h_ratio + age + chol + I(chol^2)+ bmi+I(bmi^2) 449.7714 481.3138
# check for proportional odds assumption 
dat$indicator3 <- ifelse(dat$outcome==3, 1,0) 
mod.3v12 <- glm(indicator3 ~ w_h_ratio +age+chol+BMI, family = binomial, data = dat)
summary(mod.3v12)
## 
## Call:
## glm(formula = indicator3 ~ w_h_ratio + age + chol + BMI, family = binomial, 
##     data = dat)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5446  -0.5734  -0.3827  -0.2251   2.6566  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -11.651098   2.077498  -5.608 2.04e-08 ***
## w_h_ratio     4.187441   2.037622   2.055  0.03987 *  
## age           0.047304   0.010042   4.711 2.47e-06 ***
## chol          0.009573   0.003241   2.954  0.00314 ** 
## BMI           0.059550   0.022953   2.594  0.00947 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 341.71  on 380  degrees of freedom
## Residual deviance: 281.91  on 376  degrees of freedom
## AIC: 291.91
## 
## Number of Fisher Scoring iterations: 5
dat$indicator23 <- ifelse(dat$outcome==1, 0 ,1)
mod.32v1 <- glm(indicator23 ~ w_h_ratio + age+chol+BMI, family=binomial,data=dat )
#examining 95% CIs for the two coefficients
coef(mod.3v12)[2] + c(-1,1)*1.96*sqrt(vcov(mod.3v12)[2,2])
## [1] 0.1937014 8.1811804
coef(mod.3v12)[3] + c(-1,1)*1.96*sqrt(vcov(mod.3v12)[3,3])
## [1] 0.02762239 0.06698582
coef(mod.3v12)[4] + c(-1,1)*1.96*sqrt(vcov(mod.3v12)[4,4])
## [1] 0.003220756 0.015924463
coef(mod.3v12)[5] + c(-1,1)*1.96*sqrt(vcov(mod.3v12)[5,5])
## [1] 0.01456302 0.10453762
coef(mod.32v1)[2] + c(-1,1)*1.96*sqrt(vcov(mod.32v1)[2,2])
## [1] 2.014893 9.487432
coef(mod.32v1)[3] + c(-1,1)*1.96*sqrt(vcov(mod.32v1)[3,3])
## [1] 0.03231843 0.06819021
coef(mod.32v1)[4] + c(-1,1)*1.96*sqrt(vcov(mod.32v1)[4,4])
## [1] 0.0004873271 0.0121938689
coef(mod.32v1)[5] + c(-1,1)*1.96*sqrt(vcov(mod.32v1)[5,5])
## [1] 0.009750427 0.091846086
# CI’s for all variables' beta coefficients overlap, so proportional odds assumption holds
# Another way
# Fitting a generalized ordinal model (without the proportional odds assumption)
mod.no_asp <- vglm(outcome ~ w_h_ratio + age + chol+BMI, cumulative(parallel=FALSE, reverse=T), data=dat)
# Conducting the likelihood ratio test
pchisq(deviance(ord.wh_age_chol_bmi) - deviance(mod.no_asp), df=df.residual(ord.wh_age_chol_bmi)-df.residual(mod.no_asp), lower.tail=F)

3. Poisson models

Model Summary

table3<- matrix(c(AIC(mod.wh_age_chol2_bmi2), BIC(mod.wh_age_chol2_bmi2),
                  AIC(mod_lg5),BIC(mod_lg5),
                  AIC(mod.mul_wh_age_chol_bmi), BIC(mod.mul_wh_age_chol_bmi),
                  AIC(ord.wh_age_chol_bmi), BIC(ord.wh_age_chol_bmi)
                  ), ncol=2,nrow=4, byrow=TRUE)
colnames(table3)<- c("AIC", "BIC")
rownames(table3)<- c("ln(glyhb) ~ w_h_ratio + age + chol + chol^2+ BMI + BMI^2",
                     "logit(P_diabetes) ~ w_h_ratio + age + chol + BMI",
                     "multinomial(P_diabetes) ~ w_h_ratio + age + chol + BMI",
                     "ordinal(P_diabetes) ~ w_h_ratio + age + chol + BMI")
table1 <- as.table(table3)
kable(table3)
AIC BIC
ln(glyhb) ~ w_h_ratio + age + chol + chol^2+ BMI + BMI^2 108.0788 139.6212
logit(P_diabetes) ~ w_h_ratio + age + chol + BMI 291.9100 311.6240
multinomial(P_diabetes) ~ w_h_ratio + age + chol + BMI 451.0060 490.4340
ordinal(P_diabetes) ~ w_h_ratio + age + chol + BMI 448.1374 471.7942